https://ismayc.github.io/talks/ness-infer/slide_deck.html#54
2019-10-28
https://ismayc.github.io/talks/ness-infer/slide_deck.html#54
Don’t look as good
Hard to build more complex plots, and fine-tune
ggplot2What is a statistical graphic?
Take variables from a dataset
map them to aes()thetic attributes
geom_etric objects
How are variables mapped to aesthetic attributes of points?
Construct a graphic by adding modular pieces
ggplot(data, mapping)
Add ‘layers’ of geometric objects
Adjustments to axis scales, colors, labels, asthetic mods
“Chaining” together components (use + rather than %>%)
gap_92 <- gapminder %>% filter(year == 1992) %>% mutate(gdp = gdpPercap * pop / 1e9) gap_92 %>% head(4)
## # A tibble: 4 x 7 ## country continent year lifeExp pop gdpPercap gdp ## <chr> <chr> <int> <dbl> <int> <dbl> <dbl> ## 1 Afghanistan Asia 1992 41.7 16317921 649. 10.6 ## 2 Albania Europe 1992 71.6 3326498 2497. 8.31 ## 3 Algeria Africa 1992 67.7 26298373 5023. 132. ## 4 Angola Africa 1992 40.6 8735988 2628. 23.0
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) + geom_point()
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp, shape = continent)) + geom_point()
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) + geom_point() + scale_x_log10()
Change scaling of x- and y-axes (also reverse axes)
Change which colors etc. get applied to which values
labs function adds custom axis labels and titlesggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) +
geom_point() +
scale_x_log10() +
labs(x = 'Gross Domestic Product (Billions $)',
y = 'Life Expectancy at birth (years)',
title = 'Gapminder for 1992')
Scatterplot: geom_point()
Line graph: geom_line()
Smoothing functions: geom_smooth()
Text: geom_text()
Histogram: geom_histogram()
Boxplot: geom_boxplot
Bar graph: geom_bar()
df <- gapminder %>% filter(country == 'Romania') ggplot(df, mapping = aes(x = year, y = lifeExp)) + geom_line()
We can add as many geoms to a plot as we want, stacked on as ‘layers’ in order
ggplot(df, mapping = aes(x = year, y = lifeExp)) + geom_line() + geom_point()
What if we had multiple data points per year?
df <- gapminder %>%
filter(country %in% c('Romania', 'Thailand'))
ggplot(df, mapping = aes(x = year, y = lifeExp)) +
geom_line() +
geom_point()
Need to separate them by country (group aesthetic)
ggplot(df, mapping = aes(x = year, y = lifeExp, group = country)) + geom_line() + geom_point()
Often useful to color lines by group, use color aesthetic with a categorical variable and it automatically groups
ggplot(df, mapping = aes(x = year, y = lifeExp, color = country)) + geom_line() + geom_point()
ggplot but can override this for individual geomsggplot(df, mapping = aes(x = year, y = lifeExp)) + geom_line(mapping = aes(color = country)) + geom_point()
ggplot(df, mapping = aes(x = year, y = lifeExp, color = country)) + geom_line(linetype = 2, size = 1.5) + geom_point(color = 'black', size = 3, alpha = 0.75)
If we map a continuous variable to color it won’t group automatically
ggplot(df, mapping = aes(x = year, y = lifeExp, color = gdpPercap)) + geom_line() + geom_point(size = 3)
We need to specify group manually
ggplot(df, mapping = aes(x = year, y = lifeExp,
group = country, color = gdpPercap)) +
geom_line() +
geom_point(size = 3)
Assume continuous map for numeric data, discrete map for strings
Make numeric data into factors if you want discrete colors
my_df <- gapminder %>% filter(year %in% c(1957, 1977, 1997)) ggplot(my_df, mapping = aes(x = gdpPercap, y = lifeExp, color = factor(year))) + geom_point() + scale_x_log10() + labs(color = 'year')
We can use scale_color_manual to set the color of each group manually
my_cols <- c(Romania = 'green', Thailand = 'orange') ggplot(df, mapping = aes(x = year, y = lifeExp, color = country)) + geom_line() + scale_color_manual(values = my_cols)
scale_color_brewer offers some useful default color schemes
ggplot(df, mapping = aes(x = year, y = lifeExp, color = country)) + geom_line() + scale_color_brewer(palette = 'Dark2')
https://www.r-bloggers.com/a-detailed-guide-to-ggplot-colors/
Practice
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) +
geom_line() +
geom_point() +
scale_x_log10() +
labs(x = 'Gross Domestic Product (Billions $)',
y = 'Life Expectancy at birth (years)')
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) + geom_point() + geom_smooth() + scale_x_log10() + labs(x = 'Gross Domestic Product (Billions $)', y = 'Life Expectancy at birth (years)')
ggplot(gap_92, mapping = aes(x = gdp, y = lifeExp)) + geom_point() + geom_smooth(method = 'lm') + scale_x_log10() + labs(x = 'Gross Domestic Product (Billions $)', y = 'Life Expectancy at birth (years)')
ggplot(gapminder, mapping = aes(x = year, y = lifeExp, color = continent)) + geom_point()
ggplot(gapminder, mapping = aes(x = year, y = lifeExp, color = continent)) + geom_smooth()
Practice
gap_early <- gapminder %>% filter(year < 1970) ggplot(gap_early, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(se = FALSE) + scale_x_log10() + facet_wrap(~continent)
ggplot(gap_early, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(se = FALSE) + scale_x_log10() + facet_grid(year ~ continent)
Practice
ggplot(gap_df, mapping = aes(x = gdp, y = lifeExp, label = country)) + geom_point() + geom_smooth(method = 'lm', se = FALSE) + scale_x_log10() + geom_text() + labs(x = 'Gross Domestic Product (Billions $)', y = 'Life Expectancy at birth (years)')
library(ggrepel)
ggplot(gap_df, mapping = aes(x = gdp, y = lifeExp)) + geom_point() + geom_smooth(method = 'lm', se = FALSE) + scale_x_log10() + labs(x = 'Gross Domestic Product (Billions $)', y = 'Life Expectancy at birth (years)') + geom_label_repel(aes(label = country), size = 2.5)
ggplot(gapminder, mapping = aes(x = gdpPercap)) + geom_histogram()
ggplot(gapminder, mapping = aes(x = gdpPercap)) + geom_histogram() + scale_x_log10()
gapminder %>% ggplot(aes(gdpPercap)) + geom_histogram(bins = 100) + scale_x_log10()
ggplot(gapminder, mapping = aes(x = gdpPercap, color = continent)) + geom_histogram() + scale_x_log10()
ggplot(gapminder, mapping = aes(x = gdpPercap, fill = continent)) + geom_histogram() + scale_x_log10()
ggplot(gapminder, mapping = aes(x = gdpPercap)) + geom_density() + scale_x_log10()
fill colorsggplot(gapminder, mapping = aes(x = gdpPercap, fill = continent)) + geom_density(alpha = 0.5) + scale_x_log10()
ggplot(gapminder, mapping = aes(x = continent, y = gdpPercap)) + geom_boxplot() + scale_y_log10()
ggplot(gapminder, mapping = aes(x = continent, y = gdpPercap)) + geom_violin() + scale_y_log10()
Need to use the idea of a factor
Factors used to encode categorical variables, specify the possible ‘levels’, and optionally an ordering
cont_order <- c('Oceania', 'Europe', 'Americas', 'Asia', 'Africa')
gap_cat <- gapminder %>%
mutate(continent = factor(continent, levels = cont_order))
ggplot(gap_cat, mapping = aes(x = continent, y = gdpPercap)) + geom_violin() + scale_y_log10()
library(ggbeeswarm) ggplot(gap_cat, mapping = aes(x = continent, y = gdpPercap)) + geom_beeswarm(size = 0.5, alpha = 0.75) + scale_y_log10()
head(mtcars, 5)
## mpg cyl disp hp drat wt qsec vs am gear carb ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Now I want to make a boxplot of horsepower for cars with different numbers of cylinders
ggplot(mtcars, mapping = aes(x = cyl, y = hp)) + geom_boxplot()
ggplot(mtcars, mapping = aes(x = factor(cyl), y = hp)) + geom_boxplot()
gap_82 <- gapminder %>% filter(year == 1982, continent == 'Americas') ggplot(gap_82, mapping = aes(x = country, y = gdpPercap)) + geom_bar()
stat = 'identity'ggplot(gap_82, mapping = aes(x = country, y = gdpPercap)) + geom_bar(stat = 'identity')
You can customize MANY details of the plot using the theme function
It’s a bit complicated at first, but most common changes are easy to google.
ggsaveggplot(gapminder, mapping = aes(x = continent, y = gdpPercap)) +
geom_violin() +
scale_y_log10()
ggsave(filename = here::here('results', 'my_fig.png'))
Practice
Tool for making interactive plots in R
Can convert ggplot2 plots into interactive form using the ggplotly function
Practice
my_plot + theme_minimal()
library(ggpubr)
my_comparisons <- list( c("Africa", "Asia"), c('Europe', 'Oceania'))
ggplot(gapminder, mapping = aes(x = continent, y = gdpPercap)) +
geom_violin() +
scale_y_log10() +
stat_compare_means(method = 'wilcox.test', comparisons = my_comparisons)
Great tool for combining multiple ‘panels’ into one plot
library(cowplot)
p1 <- ggplot(mtcars, aes(disp, mpg)) +
geom_point()
p2 <- ggplot(mtcars, aes(qsec, mpg)) +
geom_point()
plot_grid(p1, p2, labels = c('A', 'B'))